package io.github.mosiki.modules.spider.processor;

import io.github.mosiki.modules.spider.config.DetailxpathConfig;
import io.github.mosiki.modules.spider.config.RuleMatchConfig;
import io.github.mosiki.modules.spider.config.WebMagicConfig;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;

import java.util.List;

@Component
@Slf4j
public class BiShiJieProcessor extends AbstractProcessor {


    @Override
    public void init(WebMagicConfig config) {
        super.setConfig(config);
    }

    @Override
    public void process(Page page) {

        RuleMatchConfig rule = super.getConfig().getRule();
        // 列表页
        if (page.getUrl().regex(rule.getListregex()).match()) {
            String allPage = page.getJson().jsonPath("$.data.allPage").get();
            String currPage = page.getJson().jsonPath("$.data.currPage").get();

            int currPageNum = Integer.parseInt(currPage);
            // 构造分页链接
            if (currPageNum < Integer.parseInt(allPage)) {
                String url = page.getUrl().get();
                String newUrl = url.replaceFirst("\\d+", String.valueOf(currPageNum + 1));
                page.addTargetRequest(newUrl);
            }

            List<String> allNewsId = page.getJson().jsonPath("$.data.data[*].news_id").all();
            // 构造详情链接
            String newsUrl = rule.getDetailregex().replace("\\d+", "%s");
            for (String newsId : allNewsId) {
                String format = String.format(newsUrl, newsId);
                page.addTargetRequest(format);
            }
        } else {
            List<DetailxpathConfig> detailxpath = rule.getDetailxpath();
            for (DetailxpathConfig detailxpathConfig : detailxpath) {
//                log.info(page.getHtml().xpath(detailxpathConfig.getValue()).get());
            }
        }


    }
}
